Load data

metasoft_jar = "~/tools/Metasoft/Metasoft.jar"
metasoft_HanEskinPvalueTable = "~/tools/Metasoft/HanEskinPvalueTable.txt"

metasoft_output_all = data.frame()
for(pheno_i in rownames(pheno_data)){
  res_ROS_study_i = res_ROS[res_ROS$pheno == pheno_i,]
  res_ROS_study_i$chr = gsub("(.*?) (.*?):(.*)","\\2",res_ROS_study_i$sv_info)
  res_ROS_study_i = res_ROS_study_i[!res_ROS_study_i$chr %in% c("chrY","chrX"), ]
  
  res_MAP_study_i = res_MAP[res_MAP$pheno == pheno_i,]
  SVs_in_both = intersect(res_ROS_study_i$ID, res_MAP_study_i$ID)
  res_ROS_study_i_match = res_ROS_study_i[match(SVs_in_both,res_ROS_study_i$ID),]
  res_MAP_study_i_match = res_MAP_study_i[match(SVs_in_both,res_MAP_study_i$ID),]
  
  # Input File format:
  # Rows are SNPs.
  # 1st column is RSID.
  # 2nd and 3rd columns are effect size (beta) and its standard error of study 1.
  # 4th and 5th columns are effect size (beta) and its standard error of study 2.
  # Please write “NA” for missing beta and its standard error.
  
  metasoft_input = res_ROS_study_i_match[,c("ID","Estimate","Std. Error")] %>%
    full_join(res_MAP_study_i_match[,c("ID","Estimate","Std. Error")], by = "ID")
  
  metasoft_input_file = paste0(data.dir,"/metasoft_input_",pheno_i,".tsv")
  
  fwrite(metasoft_input, 
         file = metasoft_input_file, sep = "\t",
         quote = F, col.names = F, row.names = F)
  
  metasoft_output_file = paste0(data.dir,"/metasoft_output_",pheno_i,".tsv")
  metasoft_log_file = paste0(data.dir,"/metasoft_log_",pheno_i,".log")
  metasoft_cmd = paste("java -jar", metasoft_jar, 
                       "-input", metasoft_input_file,
                       "-output", metasoft_output_file,
                       "-log", metasoft_log_file,
                       "-pvalue_table", metasoft_HanEskinPvalueTable)
  system(metasoft_cmd)
  metasoft_output = fread(metasoft_output_file)
  metasoft_output$pheno = pheno_i
  metasoft_output_all = bind_rows(metasoft_output_all, metasoft_output)
}
save(metasoft_output_all, file = paste0(data.dir,"/metasoft_output_all.RData"))

Meta-analysis Results

load(paste0(data.dir,"/metasoft_output_all.RData"))

vcf.meta$sv_info = paste0(vcf.meta$SVTYPE, " chr", vcf.meta$CHROM, ":", vcf.meta$POS, " (len:", vcf.meta$SVLEN, " maf:", vcf.meta$MAF,")")

res_meta = vcf.meta[,c("ID","sv_info","closest_gene")] %>% 
  inner_join(metasoft_output_all, by = c("ID"="RSID")) %>%
  left_join(pheno_data %>% rownames_to_column("pheno"), by = c("pheno")) %>%
  filter(pheno %in% names(pheno_list)) %>%
  select(-c(PVALUE_BE,`PVALUES_OF_STUDIES(Tab_delimitered)`,`MVALUES_OF_STUDIES(Tab_delimitered)`,V19,V20,V21,family)) %>%
  arrange(PVALUE_RE2) 

res_final = add_LD_info(res_meta)

data.table::fwrite(res_final, file = paste0(data.dir,"/Meta_analysis_results.tsv.gz"))
createDT(res_final %>% head(1000))
res_final$pval = res_final$PVALUE_RE2

ci = 0.95
df_m = res_final %>% 
  group_by(pheno) %>%
  mutate(observed = -log10(sort(pval)),
         expected = -log10(ppoints(n())),
         clower   = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n(), shape2 = n():1)),
         cupper   = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n(), shape2 = n():1)),
         lambda   = median(qchisq(1 - pval, 1)) / qchisq(0.5, 1)) %>%
  reframe(lambda = unique(lambda), lambda_label = sprintf("%s (λ = %.2f)", pheno, lambda)) %>% distinct()

data.table::fwrite(df_m, file = paste0(data.dir,"/Meta_analysislambdas.tsv.gz"))
createDT(df_m)
res_final$category = NULL
res_final$family = NULL
res_final$description = NULL

plot_multitrait_manhattan(res_final, pheno_label = "description")

Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggrepel_0.9.5     data.table_1.15.4 vcfR_1.15.0       lubridate_1.9.3  
##  [5] forcats_1.0.0     stringr_1.5.1     dplyr_1.1.4       purrr_1.0.2      
##  [9] readr_2.1.5       tidyr_1.3.1       tibble_3.2.1      ggplot2_3.5.1    
## [13] tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.13        ape_5.8            lattice_0.20-45    digest_0.6.36     
##  [5] utf8_1.2.4         R6_2.5.1           evaluate_0.24.0    highr_0.11        
##  [9] pillar_1.9.0       rlang_1.1.4        rstudioapi_0.16.0  vegan_2.6-6.1     
## [13] jquerylib_0.1.4    R.utils_2.12.3     R.oo_1.26.0        Matrix_1.6-5      
## [17] DT_0.33            rmarkdown_2.27     labeling_0.4.3     splines_4.1.2     
## [21] pinfsc50_1.3.0     htmlwidgets_1.6.4  munsell_0.5.1      compiler_4.1.2    
## [25] xfun_0.46          pkgconfig_2.0.3    mgcv_1.9-1         htmltools_0.5.8.1 
## [29] tidyselect_1.2.1   fansi_1.0.6        permute_0.9-7      viridisLite_0.4.2 
## [33] tzdb_0.4.0         withr_3.0.0        MASS_7.3-60.0.1    R.methodsS3_1.8.2 
## [37] grid_4.1.2         nlme_3.1-153       jsonlite_1.8.8     gtable_0.3.5      
## [41] lifecycle_1.0.4    magrittr_2.0.3     scales_1.3.0       cli_3.6.3         
## [45] stringi_1.8.4      cachem_1.1.0       farver_2.1.2       bslib_0.8.0       
## [49] generics_0.1.3     vctrs_0.6.5        ggeasy_0.1.4       RColorBrewer_1.1-3
## [53] tools_4.1.2        glue_1.7.0         crosstalk_1.2.1    hms_1.1.3         
## [57] parallel_4.1.2     fastmap_1.2.0      yaml_2.3.10        timechange_0.3.0  
## [61] colorspace_2.1-1   cluster_2.1.2      knitr_1.48         sass_0.4.9